require(lavaan)
require(semPlot)
require(AICcmodavg)
require(ggplot2)
require(ggthemes)

rm(list=ls())

### This script will read in all stickleback data and run a path analysis on it.

# Load in the data
dataNoVid <- read.csv('FinalAllNoVidData_April20.2022.csv')

#I did outlier analysis in the stats video, so I will remove those values
dataNoVidOutlier <- dataNoVid[-c(2,5,13,14),]
origFibrScore <- dataNoVid[-c(2,5,13,14),]$Fibrosis.Score



#### Do size corrections on the force vs. CSA data ####
lmSizeCorrOutlier <- lm(data=dataNoVidOutlier, SLslope~CSA)
summary(lmSizeCorrOutlier)
dataNoVidOutlier$SLslopeResid <- residuals(lmSizeCorrOutlier)
dataNoVidOutlier$predict <- predict(lmSizeCorrOutlier)


#### Correct fibrosis scores so that they run from 0-1. This way coefficient estimates will represent maximum effect of fibrosis.
dataNoVidOutlier$Fibrosis.Score <- origFibrScore/4

mean(origFibrScore[dataNoVidOutlier$Treatment=='PBS'])
mean(origFibrScore[dataNoVidOutlier$Treatment=='Alum'])

#Create dummy variable to represent interaction effect of treatment on stiffness vs. curvature
dataNoVidOutlier$Treatment <- as.factor(dataNoVidOutlier$Treatment)
dataNoVidOutlier$TreatmentBin <- ifelse(dataNoVidOutlier$Treatment=="Alum",1,0)
dataNoVidOutlier$SlopeTreatInteraction <- dataNoVidOutlier$TreatmentBin * dataNoVidOutlier$SLslopeResid




#Test the direct effect of treatment on fibrosis to confirm that the treatment had the intended effect.

lmDE1 <- lm(data=dataNoVidOutlier, Fibrosis.Score~Treatment)
summary(lmDE1)

#Now that we know the treatment causes fibrosis, we can see if fibrosis directly affects all the other variables.

lmDE2 <- lm(data=dataNoVidOutlier, SLslopeResid~Fibrosis.Score)
summary(lmDE2)

lmDE3 <- lm(data=dataNoVidOutlier, Highest.Max.Curve~Fibrosis.Score)
summary(lmDE3)

lmDE4 <- lm(data=dataNoVidOutlier, maxAngVel~Fibrosis.Score)
summary(lmDE4)

lmDE5 <- lm(data=dataNoVidOutlier, maxLinVel~Fibrosis.Score)
summary(lmDE5)


#Or one model that includes everything. Similar to path analysis but without heirarchical structure.
lmDE6 <- lm(data=dataNoVidOutlier, maxAngVel~Fibrosis.Score+SLslopeResid+Highest.Max.Curve)
summary(lmDE6)

lmDE7 <- lm(data=dataNoVidOutlier, maxLinVel~Fibrosis.Score+SLslopeResid+Highest.Max.Curve)
summary(lmDE7)


#The point of this that we can find some effects, but we can't see the overall effect of treatment on final performance metrics.
#It is only by putting it all into a hierarchical path analysis that we can pick out the true effects

PBSData <- dataNoVidOutlier[dataNoVidOutlier$Treatment=='PBS',]
tempLM <- lm(data=PBSData, Highest.Max.Curve~SLslopeResid)
summary(tempLM)

#Define hypothesized base model for path analysis
    #This model includes our linear path from treatment->fibrosis score->stiffness->curvature->performance. It also includes indirect links from fibrosis score to curvature and performance to see if there are other effects that we missed.

modBase <-  
    'Fibrosis.Score ~ a*TreatmentBin
    SLslopeResid ~ b*Fibrosis.Score
    SlopeTreatInteraction ~ c*TreatmentBin + d*SLslopeResid
    Highest.Max.Curve ~ e*SLslopeResid + f*SlopeTreatInteraction
    maxAngVel ~ g*Highest.Max.Curve
    maxLinVel ~ h*Highest.Max.Curve
    '
  

#Now make the other hypothesized paths

  # + Behavior
modnB <-  
  'Fibrosis.Score ~ a*TreatmentBin
    SLslopeResid ~ b*Fibrosis.Score
    SlopeTreatInteraction ~ c*TreatmentBin + d*SLslopeResid
    Highest.Max.Curve ~ e*SLslopeResid + f*Fibrosis.Score + g*SlopeTreatInteraction
    maxAngVel ~ h*Highest.Max.Curve
    maxLinVel ~ i*Highest.Max.Curve
    '


  # +Force Transfer
modnF <-  
  'Fibrosis.Score ~ a*TreatmentBin
    SLslopeResid ~ b*Fibrosis.Score
    SlopeTreatInteraction ~ c*TreatmentBin + d*SLslopeResid
    Highest.Max.Curve ~ e*SLslopeResid + f*SlopeTreatInteraction
    maxAngVel ~ g*Highest.Max.Curve + h*SLslopeResid
    maxLinVel ~ i*Highest.Max.Curve + j*SLslopeResid
    '


  # + Behavior + Force transfer
modnBF <-  
  'Fibrosis.Score ~ a*TreatmentBin
    SLslopeResid ~ b*Fibrosis.Score
    SlopeTreatInteraction ~ c*TreatmentBin + d*SLslopeResid
    Highest.Max.Curve ~ e*SLslopeResid + f*Fibrosis.Score + g*SlopeTreatInteraction
    maxAngVel ~ h*Highest.Max.Curve + i*SLslopeResid
    maxLinVel ~ j*Highest.Max.Curve + k*SLslopeResid
    '



### Look at model summaries to decide which is best

    #Data frame to hold all model statistics
modIndDF <- data.frame(model=c('Base','Base+B','Base+F','Base+B+F'), AIC.score=rep(NA,4), BIC.score=rep(NA,4), deltaAIC=rep(NA,4),deltaBIC=rep(NA,4),AIC.weight=rep(NA,4),BIC.weight=rep(NA,4))


pathModBase <- sem(modBase, data=dataNoVidOutlier)
#summary(pathModBase, standardized=TRUE,rsquare=TRUE,fit.measures=TRUE) #standardized standardizes the parameter estimates, rsquare displays r squared for each dependent variable
tempa <- fitMeasures(pathModBase)
tempa
#AICc(pathModBase) #Courtesy of AICcmodavg package!


pathModnB <- sem(modnB, data=dataNoVidOutlier)
tempb <- fitMeasures(pathModnB)
tempb


pathModnF <- sem(modnF, data=dataNoVidOutlier)
tempc <- fitMeasures(pathModnF)
tempc


pathModnBF <- sem(modnBF, data=dataNoVidOutlier)
tempd <- fitMeasures(pathModnBF)
tempd





modIndDF$AIC.score <- c(tempa[19],tempb[19],tempc[19],tempd[19])#,tempe[19],tempf[19],tempg[19],temph[19])
modIndDF$BIC.score <- c(tempa[20],tempb[20],tempc[20],tempd[20])#,tempe[20],tempf[20],tempg[20],temph[20])

###Calculate AIC and BIC Weights for each model
minAIC <- min(modIndDF$AIC.score)
minBIC <- min(modIndDF$BIC.score)

modIndDF$deltaAIC <- modIndDF$AIC.score-minAIC
modIndDF$deltaBIC <- modIndDF$BIC.score-minBIC

    #Formula for AIC weight is exp(-0.5*deltaAIC)/sum all weights
tempAICw <- exp(-0.5*modIndDF$deltaAIC)
modIndDF$AIC.weight <- tempAICw/sum(tempAICw,na.rm=FALSE)

tempBICw <- exp(-0.5*modIndDF$deltaBIC)
modIndDF$BIC.weight <- tempBICw/sum(tempBICw,na.rm=FALSE)


modIndDF


#It appears that Base+B is the best model
summary(pathModnB, standardized=FALSE,rsquare=TRUE,fit.measures=TRUE)
fitMeasures(pathModnB)


################################################################################

#The fit indices determined that Base+B is the best model.Now we proceed with Base+B and get all the statistics from here.



#Also include indirect and total effects

modFinal <-  
  'Fibrosis.Score ~ a*TreatmentBin
    SLslopeResid ~ b*Fibrosis.Score
    SlopeTreatInteraction ~ h*TreatmentBin + i*SLslopeResid
    Highest.Max.Curve ~ c*SLslopeResid + d*Fibrosis.Score + g*SlopeTreatInteraction
    maxAngVel ~ e*Highest.Max.Curve
    maxLinVel ~ f*Highest.Max.Curve
    
    totEff.Fib.P1P.Alum := (b*c+b*i*g+d)*e   #Total effect of fibrosis on P1P including stiffness effect and behavior effect
    totEff.Fib.P2P.Alum := (b*c+b*i*g+d)*f   #Total effect of fibrosis on P2P including stiffness effect and behavior effect
    totEff.Fib.P1P.PBS := (b*c+d)*e   #Total effect of fibrosis on P1P including stiffness effect and behavior effect
    totEff.Fib.P2P.PBS := (b*c+d)*f   #Total effect of fibrosis on P2P including stiffness effect and behavior effect
    
    
    indEff.F.P1P.Stf.PBS := b*c*e   #Indirect effect of fibrosis on P1P VIA stiffness in PBS fish
    indEff.F.P1P.Beh := d*e   #Indirect effect of fibrosis on P1P VIA behavioral effect
    
    indEff.F.P2P.Stf.PBS := b*c*f   #Indirect effect of fibrosis on P1P VIA stiffness in PBS fish
    indEff.F.P2P.Beh := d*f   #Indirect effect of fibrosis on P1P VIA behavioral effect
  '

#Run the model
pathModFinal <- sem(modFinal, data=dataNoVidOutlier)

#dataNoVidOutlier$Treatment <- ordered(dataNoVidOutlier$Treatment, levels =c("PBS", "Alum"))

summary(pathModFinal, standardized=TRUE,rsquare=TRUE,fit.measures=TRUE)

#Total effects are the sum of the direct effect and the indirect effects.


#SEM modeling is structural equation modeling


parameterEstimates(pathModFinal)
standardizedSolution(pathModFinal)
semPaths(pathModFinal, whatLabels = 'std', layout='spring') #std gives standardized estimates. whatlabels=par would give non-standardized values.
semPaths(pathModFinal, whatLabels = 'est', layout='spring') #non-standardized estimates
#other layouts: tree, tree2, circle, circle2, more

#"Estimate" is the coefficient estimate, non-standardized
#"std.all" is the standardized coefficient (aka an increase of 1 st.dev of x corresponds to a change of n standard deviations in y)



#### Find effect sizes from path analysis ####

#totEff1 has coefficient estimate -0.221 for the TOTAL effect of fibrosis on angular velocity
fib0MeanAng <- mean(dataNoVidOutlier$maxAngVel[dataNoVidOutlier$Fibrosis.Score==0],na.rm = TRUE)
maxEffectTotAng <- -0.221
percEffTotAng <- maxEffectTotAng/fib0MeanAng

#totEff2 has coefficient estimate +0.092 for the TOTAL effect of fibrosis on linear distance
fib0MeanLin <- mean(dataNoVidOutlier$maxLinVel[dataNoVidOutlier$Fibrosis.Score==0],na.rm = TRUE)
maxEffectTotLin <- 0.092
percEffTotLin <- maxEffectTotLin/fib0MeanLin

#indEff1 has coefficient estimate 0.099 for the effect of fibrosis on angular velocity via Stiffness
maxEffectAng1 <- .099
percEffIndAng1 <- maxEffectAng1/fib0MeanAng

#indEff2 has coefficient estimate -0.320 for the effect of fibrosis on angular velocity via behavior (skip stiffness)
maxEffectAng2 <- -0.320
percEffIndAng2 <- maxEffectAng2/fib0MeanAng

#indEff3 has coefficient estimate -0.042 for the effect of fibrosis on linear distance via Stiffness
maxEffectLin1 <- -0.042
percEffIndLin1 <- maxEffectLin1/fib0MeanLin

#indEff4 has coefficient estimate +0.134 for the effect of fibrosis on linear distance via Behavior (skip stiffness)
maxEffectLin2 <- 0.134
percEffIndLin2 <- maxEffectLin2/fib0MeanLin

for(dumb in 1:1){
  print(percEffTotAng)
  print(percEffTotLin)
  print(percEffIndAng1)
  print(percEffIndAng2)
  print(percEffIndLin1)
  print(percEffIndLin2)
}



### We want to show the raw data in the figure, so now we plot the individual regressions that are shown in the path analysis
    #Have to correct for Stiffness when plotting the effect of fibrosis on body curvature.
    #Do this by getting the residuals of Stiffness~Fibrosis, then plotting that vs. body curvature
      #This isn't quite the same as what the path analysis does in terms of p-values, but works for the plot





ggplot(dataNoVidOutlier, aes(x=reorder(Treatment, Fibrosis.Score),y=Fibrosis.Score)) + 
  geom_boxplot(aes(),size=1,alpha=1) +
  geom_point(color="black", size=5, position = position_jitter(width=0.3, seed = 47))+
  theme_few()

ggplot(dataNoVidOutlier, aes(x=Fibrosis.Score,y=SLslopeResid)) + 
  geom_point(aes(),size=4,alpha=1) +
  stat_smooth(method = "lm", col = "black", lwd=2) +
  theme_few()


lmSTcorr <- lm(data=dataNoVidOutlier, Highest.Max.Curve~Fibrosis.Score)
  summary(lmSTcorr)
  dataNoVidOutlier$FS.Resid <- as.vector(rep(NA,nrow(dataNoVidOutlier)))
  resids <- resid(lmSTcorr)
  residNames <- names(resids)
    for(rowN in 1:length(resids)){
      rowMatch <- which(dataNoVidOutlier$X==residNames[rowN])
      dataNoVidOutlier$FS.Resid[rowMatch] <- (resids[rowN])
  }

ggplot(dataNoVidOutlier, aes(x=FS.Resid,y=SLslopeResid)) + 
  geom_point(aes(),size=4,alpha=1) +
  stat_smooth(method = "lm", lwd=2) +
  theme_few()
  
ggplot(dataNoVidOutlier, aes(x=SLslopeResid,y=Highest.Max.Curve, shape=Treatment)) + 
  geom_point(aes(),size=4,alpha=1) +
  stat_smooth(method = "lm", lwd=2) +
  scale_shape_manual(values = c(16,1)) +
  theme_few() +
  theme(legend.position = "none")

ggplot(dataNoVidOutlier, aes(x=Fibrosis.Score,y=Highest.Max.Curve)) + 
  geom_point(aes(),size=4,alpha=1) +
  stat_smooth(method = "lm", lwd=2) +
  theme_few()

lma <- lm(data=dataNoVidOutlier, Highest.Max.Curve~Fibrosis.Score)
summary(lma)
lmb <- lm(data=dataNoVidOutlier, SLslopeResid~Fibrosis.Score)
summary(lmb)
lmc <- lm(data=dataNoVidOutlier, Highest.Max.Curve~Fibrosis.Score+SLslopeResid)
summary(lmc)

plot(data=dataNoVidOutlier, Highest.Max.Curve~Fibrosis.Score+SLslopeResid)
plot(data=dataNoVidOutlier, SLslopeResid~Fibrosis.Score)
plot(data=dataNoVidOutlier, Highest.Max.Curve~SLslopeResid, color=Treatment)

scatterplot3d(type="h",x=dataNoVidOutlier$Fibrosis.Score,y=dataNoVidOutlier$SLslopeResid,z=dataNoVidOutlier$Highest.Max.Curve)

ggplot(dataNoVidOutlier, aes(x=Fibrosis.Score,y=Highest.Max.Curve)) + 
  geom_point(aes(),size=4,alpha=1) +
  stat_smooth(method = "lm", col = "black", lwd=2) +
  theme_few()


ggplot(dataNoVidOutlier, aes(x=SLslopeResid,y=Highest.Max.Curve)) + 
  geom_point(aes(),size=4,alpha=1) +
  stat_smooth(method = "lm", col = "black", lwd=2) +
  theme_few()

ggplot(dataNoVidOutlier, aes(x=Highest.Max.Curve,y=maxAngVel)) + 
  geom_point(aes(),size=4,alpha=1) +
  stat_smooth(method = "lm", col = "black", lwd=2) +
  theme_few()

ggplot(dataNoVidOutlier, aes(x=Highest.Max.Curve,y=maxLinVel)) + 
  geom_point(aes(),size=4,alpha=1) +
  stat_smooth(method = "lm", col = "black", lwd=2) +
  theme_few()

